Load packages
library(dplyr)
library(ggplot2)
library(leaflet)
library(sf) # handle simple features objects
library(spdep) # areal analysis
library(proj4)
library(maptools)
library(geoR)
library(spgwr)
library(nlme)
library(pgirmess)
Data downloaded from https://github.com/coreysparks/data
#mort<-st_read("/Users/mf/Dropbox (University of Southern California)/Courses/PM569/Data/usdata_mort.shp")
#mort_sp <- sf:::as_Spatial(mort$geom)
# don't know projection so use readShapePoly
mort_sp <-readShapePoly("/Users/mf/Dropbox (University of Southern California)/Courses/PM569/Data/usdata_mort.shp")
library(rgeos) # use this to get centroid coordinates of the counties
mort_coords<-data.frame(gCentroid(mort_sp,byid=TRUE))
#mort.pal = colorNumeric(c('blue','purple','pink','yellow'),
# domain=mort$mortrate)
#mort %>%
# leaflet() %>%
# addProviderTiles('CartoDB.Positron') %>%
# addPolygons(fillColor=~mort.pal(mortrate), weight=.5, fillOpacity=.9,
# label=~paste0(NAME, ': ', poptot)) %>%
# addLegend("bottomleft", pal = mort.pal, values = ~mortrate, title = "County Mortality Rates", opacity = 1)
spplot(mort_sp,"mortrate", at=quantile(mort_sp$mortrate), col.regions=heat.colors(10), main="Spatial Distribution of US Mortality Rate")
Let’s use 2-NN as our starting place to look at the global Moran’s I for Mortality Rates (use lag plot)
mort_nb2<-knearneigh(coordinates(mort_sp), k=2)
mort_nb2<-knn2nb(mort_nb2)
mort_wt2<-nb2listw(mort_nb2, style="W")
mort_corrneigh<-sp.correlogram(mort_nb2, mort_sp$mortrate, order=10, method="I",zero.policy = TRUE)
plot(mort_corrneigh, main="Moran's I mortality rate")
# this takes a while to run
mort_correlog<-correlog((mort_coords),mort_sp$mortrate,method="Moran",nbclass=10)
plot(mort_correlog)
Look at a linear model and the spatial distribution of the residuals
mod_lm<-lm(scale(mortrate)~ppersonspo+p65plus+pwhite+pblack_1+phisp+punemp_1, data=mort_sp)
summary(mod_lm)
##
## Call:
## lm(formula = scale(mortrate) ~ ppersonspo + p65plus + pwhite +
## pblack_1 + phisp + punemp_1, data = mort_sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5903 -0.4477 0.0251 0.4782 4.1662
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.3411 0.2122 -6.321 2.97e-10 ***
## ppersonspo 7.8135 0.3237 24.136 < 2e-16 ***
## p65plus -2.1017 0.3679 -5.713 1.22e-08 ***
## pwhite 0.4330 0.2103 2.059 0.0396 *
## pblack_1 1.2972 0.2081 6.232 5.23e-10 ***
## phisp -2.1099 0.1405 -15.021 < 2e-16 ***
## punemp_1 3.4119 0.7665 4.451 8.85e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7626 on 3060 degrees of freedom
## Multiple R-squared: 0.4196, Adjusted R-squared: 0.4185
## F-statistic: 368.7 on 6 and 3060 DF, p-value: < 2.2e-16
# map residuals
mort_sp$lm_resid<-mod_lm$residuals
spplot(mort_sp,"lm_resid", at=quantile(mort_sp$lm_resid), col.regions=heat.colors(10), main="Resids from OLS")
moran.test(mort_sp$lm_resid, listw=mort_wt2)
##
## Moran I test under randomisation
##
## data: mort_sp$lm_resid
## weights: mort_wt2
##
## Moran I statistic standard deviate = 25.491, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.4273265616 -0.0003261579 0.0002814597
mod_sar1 <- errorsarlm(scale(mortrate)~ppersonspo+p65plus+pwhite+pblack_1+phisp+punemp_1, data = mort_sp, listw=mort_wt2, zero.policy=T, tol.solve=1e-12)
summary(mod_sar1)
##
## Call:
## spatialreg::errorsarlm(formula = formula, data = data, listw = listw,
## na.action = na.action, Durbin = Durbin, etype = etype, method = method,
## quiet = quiet, zero.policy = zero.policy, interval = interval,
## tol.solve = tol.solve, trs = trs, control = control)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6838336 -0.3671099 0.0083942 0.3956529 4.1889756
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.038914 0.230723 0.1687 0.8661
## ppersonspo 4.969503 0.347972 14.2813 < 2.2e-16
## p65plus 0.400593 0.388857 1.0302 0.3029
## pwhite -1.163995 0.227124 -5.1249 2.976e-07
## pblack_1 0.098609 0.233750 0.4219 0.6731
## phisp -1.966691 0.178364 -11.0263 < 2.2e-16
## punemp_1 5.108868 0.750308 6.8090 9.826e-12
##
## Lambda: 0.48691, LR test value: 717.3, p-value: < 2.22e-16
## Asymptotic standard error: 0.015327
## z-value: 31.769, p-value: < 2.22e-16
## Wald statistic: 1009.3, p-value: < 2.22e-16
##
## Log likelihood: -3158.435 for error model
## ML residual variance (sigma squared): 0.41747, (sigma: 0.64612)
## Number of observations: 3067
## Number of parameters estimated: 9
## AIC: 6334.9, (AIC for lm: 7050.2)
mort_sp$res_sar1<-residuals(mod_sar1)
spplot(mort_sp,"res_sar1", at=quantile(mort_sp$res_sar1), col.regions=heat.colors(10), main="Resids from SAR error model")
moran.test(mort_sp$res_sar1, listw=mort_wt2)
##
## Moran I test under randomisation
##
## data: mort_sp$res_sar1
## weights: mort_wt2
##
## Moran I statistic standard deviate = -4.5154, p-value = 1
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.0760691917 -0.0003261579 0.0002813806
# MCMC Moran's test giving us 1000 Moran's I statistics (better when # polygons small)
moran_mc_res1<- moran.mc(mort_sp$res_sar1,mort_wt2, zero.policy=T,nsim=999)
dist999<- hist(moran_mc_res1$res,freq=TRUE,col="light blue",main="Permutation Test for Moran's I - 999 permutations",breaks=75)
lines(moran_mc_res1$statistic,max(dist999$counts),type="h",col="red",lwd=2)
mod_sar2 = lagsarlm(scale(mortrate)~ppersonspo+p65plus+pwhite+pblack_1+phisp+punemp_1, data = mort_sp, mort_wt2, zero.policy=T, tol.solve=1.0e-12)
summary(mod_sar2)
##
## Call:spatialreg::lagsarlm(formula = formula, data = data, listw = listw,
## na.action = na.action, Durbin = Durbin, type = type, method = method,
## quiet = quiet, zero.policy = zero.policy, interval = interval,
## tol.solve = tol.solve, trs = trs, control = control)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.466024 -0.373268 0.021276 0.392352 4.117610
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.34735 0.18235 -1.9049 0.05680
## ppersonspo 4.71672 0.28982 16.2748 < 2.2e-16
## p65plus -0.63477 0.31494 -2.0155 0.04385
## pwhite -0.42313 0.17993 -2.3516 0.01869
## pblack_1 0.17723 0.17956 0.9870 0.32363
## phisp -1.52923 0.12257 -12.4769 < 2.2e-16
## punemp_1 3.68583 0.65620 5.6169 1.944e-08
##
## Rho: 0.4247, LR test value: 737, p-value: < 2.22e-16
## Asymptotic standard error: 0.014702
## z-value: 28.888, p-value: < 2.22e-16
## Wald statistic: 834.51, p-value: < 2.22e-16
##
## Log likelihood: -3148.583 for lag model
## ML residual variance (sigma squared): 0.42507, (sigma: 0.65198)
## Number of observations: 3067
## Number of parameters estimated: 9
## AIC: 6315.2, (AIC for lm: 7050.2)
## LM test for residual autocorrelation
## test value: 21.042, p-value: 4.4935e-06
mort_sp$res_sar2<-residuals(mod_sar2)
spplot(mort_sp,"res_sar2", at=quantile(mort_sp$res_sar2), col.regions=heat.colors(10), main="Resids from SAR lag model")
moran.test(mort_sp$res_sar2, listw=mort_wt2)
##
## Moran I test under randomisation
##
## data: mort_sp$res_sar2
## weights: mort_wt2
##
## Moran I statistic standard deviate = -2.2222, p-value = 0.9869
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## -0.0376033285 -0.0003261579 0.0002813869
mort_nb2b<-knearneigh(coordinates(mort_sp), k=2)
mort_nb2b<-knn2nb(mort_nb2b)
mort_wt2b<-nb2listw(mort_nb2b, style="B")
mod_car <- spautolm(scale(mortrate)~ppersonspo+p65plus+pwhite+pblack_1+phisp+punemp_1,data=mort_sp, listw=mort_wt2b, zero.policy=T, tol.solve=1e-12,family="CAR")
summary(mod_car)
##
## Call: spatialreg::spautolm(formula = formula, data = data, listw = listw,
## na.action = na.action, family = family, method = method,
## verbose = verbose, trs = trs, interval = interval, zero.policy = zero.policy,
## tol.solve = tol.solve, llprof = llprof, control = control)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.685178 -0.363960 0.010699 0.369970 4.095396
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.72672 0.23611 3.0778 0.002085
## ppersonspo 3.72883 0.35818 10.4104 < 2.2e-16
## p65plus 1.62690 0.39093 4.1616 3.160e-05
## pwhite -1.94055 0.22970 -8.4481 < 2.2e-16
## pblack_1 -0.72090 0.24546 -2.9370 0.003314
## phisp -1.75413 0.20580 -8.5235 < 2.2e-16
## punemp_1 5.78943 0.73706 7.8547 3.997e-15
##
## Lambda: 0.46282 LR test value: 1001.7 p-value: < 2.22e-16
## Numerical Hessian standard error of lambda: 0.0052053
##
## Log likelihood: -3016.233
## ML residual variance (sigma squared): 0.32392, (sigma: 0.56914)
## Number of observations: 3067
## Number of parameters estimated: 9
## AIC: 6050.5
mort_sp$res_car<-residuals(mod_car)
spplot(mort_sp,"res_car", at=quantile(mort_sp$res_car), col.regions=heat.colors(10), main="Resids from CAR model")
res_car <- residuals(mod_car)
# Moran's test
moran_resc<-moran.test(res_car, listw=mort_wt2b)
nc <- readShapeSpatial(system.file("shapes/sids.shp", package = "spData")[1])
nc$lat<-coordinates(nc)[,2]
nc$lon<-coordinates(nc)[,1]
sids_rate = (nc$SID79)/nc$BIR79
nwbirth_rate = (nc$NWBIR79)/nc$BIR79
spatial<-corSpatial(1,form=~lon+lat,type="gaussian") # like choosing a gaussian covariance function
scor<-Initialize(spatial, as(nc, "data.frame")[,c("lon","lat")], nugget=FALSE)
mod_mixed<-lme(sids_rate ~ nwbirth_rate, random= ~1|CNTY_ID, data=as(nc,"data.frame"), correlation=scor, method="ML", control=lmeControl(singular.ok=TRUE, returnObject = TRUE))
summary(mod_mixed)
## Linear mixed-effects model fit by maximum likelihood
## Data: as(nc, "data.frame")
## AIC BIC logLik
## -1053.397 -1040.371 531.6986
##
## Random effects:
## Formula: ~1 | CNTY_ID
## (Intercept) Residual
## StdDev: 9.280348e-05 0.001183845
##
## Correlation Structure: Gaussian spatial correlation
## Formula: ~lon + lat | CNTY_ID
## Parameter estimate(s):
## range
## 0.02882469
## Fixed effects: sids_rate ~ nwbirth_rate
## Value Std.Error DF t-value p-value
## (Intercept) 0.001727026 0.0002155968 98 8.010445 0.0000
## nwbirth_rate 0.001004548 0.0005770555 98 1.740817 0.0849
## Correlation:
## (Intr)
## nwbirth_rate -0.831
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.93110059 -0.61538259 -0.01740905 0.43223963 3.22018603
##
## Number of Observations: 100
## Number of Groups: 100
res_mixed<-residuals(mod_mixed, type="response")
nc$res_mixed<-res_mixed
spplot(nc,"res_mixed", at=quantile(nc$res_mixed), col.regions=heat.colors(10), main="Resids from Mixed Effects model")
# Moran's I of residuals
sids_nb <- poly2nb(nc, queen=T)
sids_mat_w <- nb2listw(sids_nb, style="B", zero.policy=TRUE)
moran.test(nc$res_mixed, listw=sids_mat_w)
##
## Moran I test under randomisation
##
## data: nc$res_mixed
## weights: sids_mat_w
##
## Moran I statistic standard deviate = 1.8638, p-value = 0.03117
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.104349163 -0.010101010 0.003770746
# compare SIDS mixed effects with SAR model
sids_nb <- poly2nb(nc, queen=T)
sids_mat_w <- nb2listw(sids_nb, style="W", zero.policy=TRUE)
mod_sar_nc <- errorsarlm(sids_rate ~ nwbirth_rate, data = nc, listw=sids_mat_w, zero.policy=T, tol.solve=1e-12)
summary(mod_sar_nc)
##
## Call:
## spatialreg::errorsarlm(formula = formula, data = data, listw = listw,
## na.action = na.action, Durbin = Durbin, etype = etype, method = method,
## quiet = quiet, zero.policy = zero.policy, interval = interval,
## tol.solve = tol.solve, trs = trs, control = control)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2321e-03 -7.1518e-04 -1.9593e-05 5.0607e-04 3.9720e-03
##
## Type: error
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.00169814 0.00026671 6.3670 1.927e-10
## nwbirth_rate 0.00108402 0.00069006 1.5709 0.1162
##
## Lambda: 0.27857, LR test value: 3.8702, p-value: 0.049149
## Asymptotic standard error: 0.13102
## z-value: 2.1261, p-value: 0.033491
## Wald statistic: 4.5205, p-value: 0.033491
##
## Log likelihood: 533.6337 for error model
## ML residual variance (sigma squared): 1.3327e-06, (sigma: 0.0011544)
## Number of observations: 100
## Number of parameters estimated: 4
## AIC: -1059.3, (AIC for lm: -1057.4)
moran.test(residuals(mod_sar_nc),sids_mat_w)
##
## Moran I test under randomisation
##
## data: residuals(mod_sar_nc)
## weights: sids_mat_w
##
## Moran I statistic standard deviate = 0.23452, p-value = 0.4073
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.005034967 -0.010101010 0.004165434
#compare to lm
sids_lm<-lm(sids_rate ~ nwbirth_rate,data=nc)
summary(sids_lm)
##
## Call:
## lm(formula = sids_rate ~ nwbirth_rate, data = nc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0023002 -0.0007330 -0.0000207 0.0005148 0.0038356
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0017270 0.0002156 8.010 2.41e-12 ***
## nwbirth_rate 0.0010045 0.0005771 1.741 0.0849 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0012 on 98 degrees of freedom
## Multiple R-squared: 0.03, Adjusted R-squared: 0.0201
## F-statistic: 3.03 on 1 and 98 DF, p-value: 0.08485
moran.test(sids_lm$residuals, listw=sids_mat_w)
##
## Moran I test under randomisation
##
## data: sids_lm$residuals
## weights: sids_mat_w
##
## Moran I statistic standard deviate = 2.2233, p-value = 0.0131
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.13367035 -0.01010101 0.00418154
# Freeman-Tukey transformation of the response to make normal distribution
sids_ft = sqrt(1000)*(sqrt(nc$SID79/nc$BIR79) + sqrt((nc$SID79+1)/nc$BIR79))
hist(sids_ft)
# the distribution of the transformed values is more symmetric and there is only one outlier.
nwbirth_rate = (nc$NWBIR79)/nc$BIR79
hist(nwbirth_rate)
# Freeman-Tukey transformation of the covariate to make normal distribution
nwbirth_ft = sqrt(1000)*(sqrt(nc$NWBIR79/nc$BIR79) + sqrt((nc$NWBIR79+1)/nc$BIR79))
hist(nwbirth_ft)
# GWR takes two steps, 1) estimate bandwidth
sids_bwG<-gwr.sel(sids_ft~nwbirth_ft, data=nc, gweight=gwr.Gauss, verbose=T, adapt=T)
## Adaptive q: 0.381966 CV score: 74.61564
## Adaptive q: 0.618034 CV score: 74.3972
## Adaptive q: 0.763932 CV score: 74.37443
## Adaptive q: 0.7297418 CV score: 74.37428
## Adaptive q: 0.7453343 CV score: 74.37193
## Adaptive q: 0.7465778 CV score: 74.3722
## Adaptive q: 0.7409741 CV score: 74.37096
## Adaptive q: 0.7366838 CV score: 74.37196
## Adaptive q: 0.7410372 CV score: 74.37097
## Adaptive q: 0.7393354 CV score: 74.37098
## Adaptive q: 0.7402008 CV score: 74.37078
## Adaptive q: 0.7398703 CV score: 74.37078
## Adaptive q: 0.7400466 CV score: 74.37075
## Adaptive q: 0.7400059 CV score: 74.37074
## Adaptive q: 0.7399541 CV score: 74.37075
## Adaptive q: 0.7400059 CV score: 74.37074
# 2) fit model
mod_gwr <- gwr(sids_ft ~ nwbirth_ft, data = nc, bandwidth=sids_bwG, gweight=gwr.Gauss, hatmatrix = TRUE)
# if you have a pre-defined bandwidth, you only need to do step 2.
mod_gwr
## Call:
## gwr(formula = sids_ft ~ nwbirth_ft, data = nc, bandwidth = sids_bwG,
## gweight = gwr.Gauss, hatmatrix = TRUE)
## Kernel function: gwr.Gauss
## Fixed bandwidth: 0.7400059
## Summary of GWR coefficient estimates at data points:
## Min. 1st Qu. Median 3rd Qu. Max.
## X.Intercept. 1.18515404 2.00169153 2.32139225 2.49758078 2.78788348
## nwbirth_ft -0.00057442 0.01401919 0.01906047 0.02536294 0.04821502
## Global
## X.Intercept. 2.5847
## nwbirth_ft 0.0099
## Number of data points: 100
## Effective number of parameters (residual: 2traceS - traceS'S): 16.08104
## Effective degrees of freedom (residual: 2traceS - traceS'S): 83.91896
## Sigma (residual: 2traceS - traceS'S): 0.8534923
## Effective number of parameters (model: traceS): 11.96186
## Effective degrees of freedom (model: traceS): 88.03814
## Sigma (model: traceS): 0.8332863
## Sigma (ML): 0.7818612
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 264.7026
## AIC (GWR p. 96, eq. 4.22): 246.534
## Residual sum of squares: 61.13069
## Quasi-global R2: 0.1612005
# Localized Coefficients from GWR
coef <- mod_gwr$SDF$nwbirth_ft
nc$gwr_coef<-coef
spplot(nc,"gwr_coef", at=quantile(nc$gwr_coef), col.regions=heat.colors(10), main="GWR local beta coefficient estimates (non-white births)")
# plot local R^2
localR2<-mod_gwr$SDF$localR2
nc$gwr_localR2<-localR2
spplot(nc,"gwr_localR2", at=quantile(nc$gwr_localR2), col.regions=heat.colors(10), main="GWR local R2")
# Residuals of GWR to see if there is residual spatial autocorrelation
gwr_res <- mod_gwr$SDF$gwr.e
nc$gwr_res<-gwr_res
spplot(nc,"gwr_res", at=quantile(nc$gwr_res), col.regions=heat.colors(10), main="GWR Residuals")
# Test residual autocorrelation with Moran's
moran.test(nc$gwr_res, listw=sids_mat_w)
##
## Moran I test under randomisation
##
## data: nc$gwr_res
## weights: sids_mat_w
##
## Moran I statistic standard deviate = 0.56711, p-value = 0.2853
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.02660716 -0.01010101 0.00418982